# Check the AXPY
which clang++

LLVM_OBJ_PATH=/data/hongyi/cudaflow/third_party/llvm/build
CUDA_PATH=/usr/local/cuda-12
TEST_CASE=axpy

# Comple the CUDA to Object
# clang++ -### test/$TEST_CASE.cu -o test/$TEST_CASE.out --cuda-gpu-arch=sm_80 \
#     -L/usr/local/cuda-12/lib64 \
#     -lcudart_static -ldl -lrt -pthread

# Comple the CUDA to Object (Step by Step)
$LLVM_OBJ_PATH/bin/clang-21 -cc1 -triple nvptx64-nvidia-cuda -aux-triple x86_64-unknown-linux-gnu -S \
        -dumpdir test/$TEST_CASE.out- -disable-free -clear-ast-before-backend \
        -disable-llvm-verifier -discard-value-names -main-file-name $TEST_CASE.cu \
        -mrelocation-model static -mframe-pointer=all -fno-rounding-math \
        -no-integrated-as -aux-target-cpu x86-64 -fcuda-is-device -mllvm \
        -enable-memcpyopt-without-libcalls -fno-threadsafe-statics -fcuda-allow-variadic-functions -mlink-builtin-bitcode $CUDA_PATH/nvvm/libdevice/libdevice.10.bc \
        -target-sdk-version=12.6 -target-cpu sm_80 -target-feature +ptx85 -debugger-tuning=gdb \
        -fno-dwarf-directory-asm -fdebug-compilation-dir=/data/hongyi/cudaflow -resource-dir $LLVM_OBJ_PATH/lib/clang/21 \
        -internal-isystem $LLVM_OBJ_PATH/lib/clang/21/include/cuda_wrappers -include __clang_cuda_runtime_wrapper.h -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13 \
        -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/13/../../../../include/x86_64-linux-gnu/c++/13 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/backward \
        -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/13/../../../../include/x86_64-linux-gnu/c++/13 \
        -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/backward -internal-isystem $LLVM_OBJ_PATH/lib/clang/21/include -internal-isystem /usr/local/include \
        -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/13/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu \
        -internal-externc-isystem /include -internal-externc-isystem /usr/include -internal-isystem $CUDA_PATH/include -internal-isystem $LLVM_OBJ_PATH/lib/clang/21/include \
        -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/13/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include \
        -internal-externc-isystem /usr/include \
        -fdeprecated-macro -fno-autolink -ferror-limit 19 -fmessage-length=186 --offload-new-driver -pthread \
        -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -fcxx-exceptions -fexceptions -fcolor-diagnostics -cuid=196f7c47be2c6202 \
        -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o ./test/$TEST_CASE-sm_80.s -x cuda test/$TEST_CASE.cu -mllvm -print-after-all > pass.log 2>&1

$CUDA_PATH/bin/ptxas -m64 -O0 --gpu-name sm_80 \
        --output-file ./test/$TEST_CASE-sm_80.o \
        ./test/$TEST_CASE-sm_80.s

$CUDA_PATH/bin/fatbinary -64 --create ./test/$TEST_CASE.fatbin \
        --image=profile=sm_80,file=./test/$TEST_CASE-sm_80.o

$LLVM_OBJ_PATH/bin/clang-21 -cc1 -triple x86_64-unknown-linux-gnu \
        -target-sdk-version=12.6 -fcuda-allow-variadic-functions \
        -aux-triple nvptx64-nvidia-cuda -emit-obj -dumpdir test/$TEST_CASE.out- -disable-free \
        -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name $TEST_CASE.cu \
        -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=all -fmath-errno -ffp-contract=on \
        -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic \
        -debugger-tuning=gdb -fdebug-compilation-dir=/data/hongyi/cudaflow -fcoverage-compilation-dir=/data/hongyi/cudaflow \
        -resource-dir $LLVM_OBJ_PATH/lib/clang/21 -internal-isystem $LLVM_OBJ_PATH/lib/clang/21/include/cuda_wrappers \
        -include __clang_cuda_runtime_wrapper.h -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13 \
        -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/13/../../../../include/x86_64-linux-gnu/c++/13 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/backward \
        -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/13/../../../../include/x86_64-linux-gnu/c++/13 \
        -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/backward -internal-isystem $LLVM_OBJ_PATH/lib/clang/21/include \
        -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/13/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu \
        -internal-externc-isystem /include -internal-externc-isystem /usr/include -internal-isystem $LLVM_OBJ_PATH/lib/clang/21/include \
        -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/13/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu \
        -internal-externc-isystem /include -internal-externc-isystem /usr/include -internal-isystem $CUDA_PATH/include \
        -fdeprecated-macro -ferror-limit 19 -fmessage-length=186 --offload-new-driver -pthread -fgnuc-version=4.2.1 \
        -fskip-odr-check-in-gmf -fcxx-exceptions -fexceptions -fcolor-diagnostics -fcuda-include-gpubinary ./test/$TEST_CASE.fatbin \
        -cuid=196f7c47be2c6202 -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o ./test/$TEST_CASE.o -x cuda test/$TEST_CASE.cu

$LLVM_OBJ_PATH/bin/clang-linker-wrapper --should-extract=sm_80 --host-triple=x86_64-unknown-linux-gnu \
        --linker-path=/usr/bin/ld -z relro --hash-style=gnu --eh-frame-hdr -m elf_x86_64 -pie -dynamic-linker /lib64/ld-linux-x86-64.so.2 \
        -o test/$TEST_CASE.out /lib/x86_64-linux-gnu/Scrt1.o /lib/x86_64-linux-gnu/crti.o /usr/lib/gcc/x86_64-linux-gnu/13/crtbeginS.o \
        -L$CUDA_PATH/lib64 -L/usr/lib/gcc/x86_64-linux-gnu/13 -L/usr/lib/gcc/x86_64-linux-gnu/13/../../../../lib64 -L/lib/x86_64-linux-gnu \
        -L/lib/../lib64 -L/usr/lib/x86_64-linux-gnu -L/usr/lib64 -L/lib -L/usr/lib ./test/$TEST_CASE.o \
        -lcudart_static -ldl -lrt -lstdc++ -lm -lgcc_s -lgcc -lpthread -lc -lgcc_s -lgcc \
        /usr/lib/gcc/x86_64-linux-gnu/13/crtendS.o /lib/x86_64-linux-gnu/crtn.o